from lxml import etree

#读取本地文件
parser = etree.HTMLParser(encoding='utf-8')
tree = etree.parse('./豆瓣.html',parser=parser)
print(tree)

# 第二种使用方法 *
tree2 = etree.HTML(open("豆瓣.html",'r', encoding="utf-8").read())
print(tree2)


# 学习xpath重点就是找路径  绝对路径一直找到低
text = tree2.xpath("/html/body/div/div/div/a/text()")
print(text)

# 返回所有a标签的文本
text2 = tree2.xpath("//a/text()")
print(text2)

# // 代表子子孙孙
text3 = tree2.xpath("/html/body/div/div/div//a/text()")
print(text3)


